#!/usr/bin/env expect
############################################################################
# Purpose: Test of Slurm functionality
#          strigger --set (job options)
#
# Output:  "TEST: #.#" followed by "SUCCESS" if test was successful, OR
#          "FAILURE: ..." otherwise with an explanation of the failure, OR
#          anything else indicates a failure mode that must be investigated.
############################################################################
# Copyright (C) 2007 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Morris Jette <jette1@llnl.gov>
# CODE-OCEC-09-009. All rights reserved.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
############################################################################
source ./globals

set test_id       "19.5"
set exit_code     0
set file_in       "test$test_id.input"
set file_in_fini  "test$test_id.fini_input"
set file_in_time  "test$test_id.time_input"
set file_out_fini "test$test_id.fini_output"
set file_out_time "test$test_id.time_output"

print_header $test_id

#
# get my uid and clear any vestigial triggers
#
set uid [stop_root_user]
exec $strigger --clear --quiet --user=$uid

#
# Build input script file and submit a job
#
make_bash_script $file_in "$srun sleep 60"

set job_id 0
spawn $sbatch --output=/dev/null -t2 $file_in
expect {
	-re "Submitted batch job ($number)" {
		set job_id $expect_out(1,string)
		exp_continue
	}
	timeout {
		send_user "\nFAILURE: srun not responding\n"
		kill_srun
		exit 1
	}
	eof {
		wait
	}
}
if {$job_id == 0} {
	send_user "\nFAILURE: batch submit failure\n"
	exit 1
}

#
# Now add a couple of triggers for that job
#
exec rm -f $file_out_fini $file_out_time
set cwd "[$bin_pwd]"
make_bash_script $file_in_time "
	$squeue -j $job_id >$cwd/$file_out_time"
make_bash_script $file_in_fini "
	$bin_sleep 5
	$squeue -j $job_id -tall >$cwd/$file_out_fini 2>&1"

set disabled 0
set matches  0
set strigger_pid [spawn $strigger --set -v --time --jobid=$job_id --offset=-90 --program=$cwd/$file_in_time]
expect {
	-re "permission denied" {
		set disabled 1
		exp_continue
	}
	-re "trigger set" {
		incr matches
		exp_continue
	}
	timeout {
		send_user "\nFAILURE: strigger not responding\n"
		slow_kill $strigger_pid
		set exit_code 1
	}
	eof {
		wait
	}
}
if {$disabled == 1} {
	send_user "\nWARNING: Current configuration prevents setting triggers\n"
	send_user "         Need to run as SlurmUser or make SlurmUser=root\n"
	cancel_job $job_id
	exec rm -f $file_in $file_in_fini $file_in_time
	exit $exit_code
}
if {$matches == 0} {
	send_user "\nFAILURE: trigger creation failure\n"
	set exit_code 1
}

set matches 0
set strigger_pid [spawn $strigger --set -v --fini --jobid=$job_id --program=$cwd/$file_in_fini]
expect {
	-re "trigger set" {
		incr matches
		exp_continue
	}
	timeout {
		send_user "\nFAILURE: strigger not responding\n"
		slow_kill $strigger_pid
		set exit_code 1
	}
	eof {
		wait
	}
}
if {$matches == 0} {
	send_user "\nFAILURE: trigger creation failure\n"
	set exit_code 1
}

set matches 0
set strigger_pid [spawn $strigger --get -v --jobid=$job_id]
expect {
	-re "$job_id" {
		incr matches
		exp_continue
	}
	timeout {
		send_user "\nFAILURE: strigger not responding\n"
		slow_kill $strigger_pid
		set exit_code 1
	}
	eof {
		wait
	}
}
if {$matches < 2} {
	send_user "\nFAILURE: trigger get failure\n"
	set exit_code 1
}

if {[wait_for_job $job_id RUNNING] != 0} {
	send_user "\nFAILURE: error starting job $job_id\n"
	set exit_code 1
}
sleep 60
if {[wait_for_job $job_id DONE] != 0} {
	send_user "\nFAILURE: error completing job $job_id\n"
	cancel_job $job_id
	exit 1
}
if {[wait_for_file $file_out_time] != 0} {
	send_user "\nFAILURE: file $file_out_time is missing\n"
	set exit_code 1
} else {
	set run_time 0
	spawn $bin_cat $file_out_time
	expect {
		-re "R *0:($number)" {
			set run_time $expect_out(1,string)
			exp_continue
		}
		-re "CG *0:($number)" {
			send_user "\nFAILURE: job unexpected found in completing state.\n"
			send_user "  This may be a sign of node failure and job requeue.\n"
			send_user "  Check for job state transistions through RUNNING,\n"
			send_user "  COMPLETING, PENDING and RUNNING again.\n"
			set exit_code 1
			set run_time $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
#	Trigger events happen on a periodic basis, so there is likely to
#	be a delay up to 20 seconds
	if {$run_time < 30 || $run_time > 59} {
		send_user "\nFAILURE: file $file_out_time contents are bad, run time\n"
		set exit_code 1
	}
}
if {[wait_for_file $file_out_fini] != 0} {
	send_user "\nFAILURE: file $file_out_fini is missing\n"
	set exit_code 1
} else {
#	Check that job run time was one minute. If Slurm is configured to
#	power down idle nodes, this could possibly take a bit more time.
	set job_fini 0
	set job_purge 0
	spawn $bin_cat $file_out_fini
	expect {
		-re "Invalid job id specified" {
			set job_purge 1
			exp_continue
		}
		-re "CD *1:($number)" {
			set job_fini 1
			exp_continue
		}
		eof {
			wait
		}
	}
	if {$job_fini == 0} {
		if {$job_purge == 1} {
			set min_job_age [get_min_job_age]
		}
		if {$job_purge == 1 && $min_job_age < 60} {
			send_user "\nWARNING: MinJobAge ($min_job_age) configured too low "
			send_user "to capture job state after completion\n"
		} elseif {[test_power_save] == 0} {
			send_user "\nFAILURE: file $file_out_fini contents are bad, CD run time\n"
			set exit_code 1
		} else {
			send_user "\nWARNING: job timeout bad, possibly due to power save mode\n"
		}
	}
}

cancel_job $job_id
if {$exit_code == 0} {
	exec $bin_rm -f $file_in $file_in_fini $file_in_time
	exec $bin_rm -f $file_out_fini $file_out_time
	send_user "\nSUCCESS\n"
}
exit $exit_code

